msg_tool\scripts\kirikiri\archive\xp3pack/
segmenter.rs

1use super::reader::Reader;
2use anyhow::Result;
3use fastcdc::v2020::StreamCDC;
4use std::io::Read;
5
6#[derive(Copy, Clone, Debug)]
7/// Configuration options for the segmenter.
8pub enum SegmenterConfig {
9    /// Do not segment the data.
10    None,
11    /// Use the FastCDC algorithm with specified minimum, average, and maximum chunk sizes.
12    FastCdc {
13        min_size: u32,
14        avg_size: u32,
15        max_size: u32,
16    },
17    /// Use fixed-size segments.
18    Fixed(usize),
19}
20
21impl Default for SegmenterConfig {
22    fn default() -> Self {
23        SegmenterConfig::FastCdc {
24            min_size: 32 * 1024,
25            avg_size: 256 * 1024,
26            max_size: 8 * 1024 * 1024,
27        }
28    }
29}
30
31impl SegmenterConfig {
32    pub fn is_none(&self) -> bool {
33        matches!(self, SegmenterConfig::None)
34    }
35}
36
37/// A trait for strategies that split a byte slice into one or more segments.
38pub trait Segmenter {
39    fn segment<'a>(
40        &'a self,
41        data: &'a mut Reader,
42    ) -> Box<dyn Iterator<Item = Result<Vec<u8>>> + 'a>;
43}
44
45pub struct FastCdcSegmenter {
46    min_size: u32,
47    avg_size: u32,
48    max_size: u32,
49}
50
51impl Segmenter for FastCdcSegmenter {
52    fn segment<'a>(
53        &'a self,
54        data: &'a mut Reader,
55    ) -> Box<dyn Iterator<Item = Result<Vec<u8>>> + 'a> {
56        let cdc = StreamCDC::new(data, self.min_size, self.avg_size, self.max_size);
57        Box::new(cdc.map(|chunk| Ok(chunk?.data)))
58    }
59}
60
61pub struct FixedSizeSegmenter {
62    size: usize,
63}
64
65impl Segmenter for FixedSizeSegmenter {
66    fn segment<'a>(
67        &'a self,
68        data: &'a mut Reader,
69    ) -> Box<dyn Iterator<Item = Result<Vec<u8>>> + 'a> {
70        let size = self.size;
71        let mut buf = vec![0; size];
72        Box::new(std::iter::from_fn(move || {
73            let nbuf = &mut buf;
74            let mut total_read = 0;
75            while total_read < size {
76                match data.read(&mut nbuf[total_read..]) {
77                    Ok(0) => break, // EOF
78                    Ok(n) => total_read += n,
79                    Err(e) => return Some(Err(e.into())),
80                }
81            }
82            if total_read == 0 {
83                None // No more data to read
84            } else {
85                Some(Ok(buf[..total_read].to_vec()))
86            }
87        }))
88    }
89}
90
91pub fn create_segmenter(config: SegmenterConfig) -> Option<Box<dyn Segmenter + Send + Sync>> {
92    match config {
93        SegmenterConfig::None => None,
94        SegmenterConfig::FastCdc {
95            min_size,
96            avg_size,
97            max_size,
98        } => Some(Box::new(FastCdcSegmenter {
99            min_size,
100            avg_size,
101            max_size,
102        })),
103        SegmenterConfig::Fixed(size) => Some(Box::new(FixedSizeSegmenter { size })),
104    }
105}